# author: Han Zhang
##########################################################
######################## left panel of Figure 9 #################################


## this plot is for the validation data 
## it plots the precison/recall curve for the classifiers (1 and 2) combined together
## and the comparison between out-of-sample validation and cross-validation


library(ggplot2)

DPI = 300
WIDTH = 7
UNIT = "in"


df2 <- read.csv("secondstage_c2_text_image_precision_recall.csv", sep = ',', stringsAsFactors = F)
df2$threshold <- NULL
dfc <- read.csv("c1c2_precision_recall_crossvalidation.csv", sep = ',', stringsAsFactors = F)

df = rbind(df2[df2$label == "C2, text and image",] , dfc[dfc$label == "Cross-Validation",])


df$label = as.character(df$label)
df$precision = as.numeric(df$precision)
df$recall = as.character(df$recall)

df$label [df$label == "C2, text and image" ] <- "Out-of-sample Validation" 

df = rbind (df, c ("Random Guess", 0.3862, 0, 0.5 ))
df = rbind (df, c ("Random Guess", 0.3862, 0, 0.5, 0.5 ))
df = rbind (df, c ("Random Guess", 0.3862, 1, 0.5 ))


df$label = as.character(df$label)
df$precision = as.numeric(df$precision)
df$recall = as.numeric(df$recall)



df$label = factor(df$label, levels = c("Out-of-sample Validation", "Cross-Validation", "Random Guess") )

labels = c("Out-of-sample Validation","Cross-Validation", "Random Guess")



ggplot(df, aes(x = recall, y = precision, colour = label, linetype = label)) +
  geom_line(size = 2) +
  scale_color_manual (values = c("black",  "grey20", "grey50"),  labels = labels) + 
  scale_linetype_manual(values = c("solid", "dotted", "dotdash"), labels = labels) +
  guides(color = guide_legend(title = "Classifier"), linetype = guide_legend(title = "Classifier")) +
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black")) +
  theme(axis.text = element_text(size = 20, colour = "black"),
        axis.title = element_text(size = 28, colour = "black"),
        plot.title = element_text(size = 20, colour = "black")) +
  theme(legend.text=element_text(size= 26),
        legend.key.width = unit(3, "line"),
        legend.key.height = unit(2, "line"),
        legend.title=element_text(size= 24)) +theme(legend.position = c(0.5, 0.2)) +
  ylim (0,1)


ggsave("c1c2_vs_cross_validation_precision_recall.pdf",
  dpi = DPI, width = WIDTH, height = WIDTH / 1.2, units = UNIT)
